# Select the pathways that are significant according to cumulative source/sink centrality, 
# introduced in:
# Naderi Yeganeh, Pourya, and M. Taghi Mostafavi. 2019. “Causal Disturbance Analysis: A Novel Graph Centrality Based Method for Pathway Enrichment Analysis.” IEEE/ACM Transactions on Computational Biology and Bioinformatics PP (March): 1–1. https://doi.org/10.1109/TCBB.2019.2907246.
# CADIA R package: https://github.com/pouryany/CADIA
# Pourya Naderi Yeganeh 2019-04-06

# CHANGES INTRODUCED: The functions 
# - pathSampler
# - processPathway
# - causalDisturbance
# were adapted to take as input signalling candidates and predict signed Metacore pathways
# Gaia Zaffaroni 2019-08-09

#--------------------------------------------------------------------------------------#

library(CADIA)
library(dplyr)

args = commandArgs(TRUE)
cutoff = args[1]
best = args[2]

pathways_adjs = readRDS("../data/Metacore_signedpathway_adjmatrix.Rdata")
p_names = read.table("../data/pathway_names.txt",sep = "\t",header=TRUE,stringsAsFactors=FALSE)
																																	  

# adapted to work with MetaCore pathways
MC_pathSampler <-  function(pathMat,deKID,iterationNo,alpha,
                         beta, statEval =1 ) {

	totPathNodes <- rownames(pathMat) # pathway nodes
	totGenes     <- length(totPathNodes) # size pathway
	sizeDE       <- sum(totPathNodes %in% deKID) # number of pathway nodes that are DE
	samplingData <- rep(0,iterationNo)
	deGenesInd   <- totPathNodes %in% deKID 
	deGenes      <- totPathNodes[deGenesInd] # DE pathway nodes
	deMatUnRef   <- pathMat[deGenesInd,deGenesInd] # adj mat of DE pathway nodes


	if (length(deMatUnRef) < 2) {
		causalDisturbance <- 1
		return(1)
	} else if (sizeDE == 0){
		return(1)
	}else{
	tryCatch({
		centr.mat <- source.sink.centrality(pathMat, alpha, beta)	# centrality of each node in pathway (independent of DEGs)
		paths.tot <- rowSums(centr.mat)
		cdist.tot <- rowSums(centr.mat[deGenesInd,]) # centrality of DEGs 
		paths.log <- sum(log2(paths.tot))
		cdist.log <- sum(log2(cdist.tot))

		causalDisturbance <- cdist.log/paths.log

		for (i in 1:iterationNo) { # random permutations of DEGs in pathway, calculate centrality for them
			randPerm <- logical(totGenes)
			posPerm  <- sample(1:totGenes, sizeDE,replace = F)
			randPerm[posPerm] = TRUE

			cdist.tot.rand  <- rowSums(centr.mat[randPerm,])
			cdist.tot.rand  <- sum(log2(cdist.tot.rand))

			samplingData[i] <- (cdist.tot.rand /paths.log)
		}

		sampleDist      <- stats::ecdf(unlist(samplingData))
		disturbProb     <- 1 - sampleDist(causalDisturbance)
		iter2 <- iterationNo

		if(disturbProb == 0)  {
			disturbProb <- 1/iterationNo
		}

		return(disturbProb)
		# return(list(samplingData, causalDisturbance))
		}, error = function(e) {
			disturbProb <- NA
			return(disturbProb)
		})
	}

}

# adapt to work with MetaCore pathways
MC_processPathway <- function(pGraph,Name, deKIDs, allKIDs, iter,
                           alpha,beta,statEval) {


    tempNodes   <- rownames(pGraph) %in% allKIDs
    nodeNums    <- rownames(pGraph)[tempNodes]
    isDiffExp   <- rownames(pGraph) %in% deKIDs
    isDiff      <- sum(isDiffExp)
    deSize      <- length(deKIDs)
    allSize     <- length(allKIDs)
    totPath     <- sum(tempNodes)


    fTestRes        <- stats::phyper(isDiff - 1, totPath, allSize - totPath,
                                     deSize, lower.tail = F)
    disturbProb     <- MC_pathSampler(pGraph, deKIDs, iter, alpha, beta, statEval)

    if(is.na(disturbProb)){
        causalDist <- fTestRes
    } else{
          causalDist <- stats::pchisq(-2 * sum(log(fTestRes), log(disturbProb)),
                                     df = 4, lower.tail = FALSE)
         }

    #cat("pathway done: ", Name,"\n")
    return(list(Name,nrow(pGraph),sum(pGraph),fTestRes,
                isDiff, disturbProb, causalDist))

}

# modified to use MC_processPathway, work with older mutate_at()
MC_causalDisturbance <- function(deIDs, allIDs, iter = 2000, alpha = 0.1,
                              beta =1,statEval = 1 , fdrMethod = "BH"){


    len     <- length(pathways_adjs)
    res     <- vector ("list", length = len)

    for ( i in 1:len ) {
        res[i] <- list(unlist(MC_processPathway(pathways_adjs[[i]],
                                             names(pathways_adjs)[i],
                                             deIDs, allIDs,iter,
                                             alpha,beta,statEval )))
	}										 

    res <- as.data.frame(res)
    # colnames(res)  <- names(pathways_adjs)
    rownames(res)  <- c("Name","nodes","edges","P_ORA",
                        "No. DE","P_SSC", "causal Disturbance")

    res <- as.data.frame(t(res))

    res.clean        <- res[!is.na(res$`P_SSC`),]
    res.clean$cadia  <- p.adjust(as.numeric(as.character(
                                            res.clean$`causal Disturbance`)),
                                 method = fdrMethod)

    res.clean$ORAFDR <- p.adjust(as.numeric(as.character
                                (res.clean$P_ORA)),method = fdrMethod)

    rownames(res.clean) <- NULL
    res.clean <- res.clean[order(res.clean$cadia),]

    res.clean[,5:9] <- mapply(as.character,res.clean[,5:9])
    res.clean[,5:9] <- mapply(as.numeric,res.clean[,5:9])
    res.clean[,5:9] <- mapply(formatC,res.clean[,5:9],
                                    MoreArgs = list(format = "e", digits = 3))

    res.clean[,5:9] <- mapply(as.numeric,res.clean[,5:9])
   
   res.clean$Name <- as.character(res.clean$Name)

    return(res.clean)
}

# -----RUN----- $

args = commandArgs(TRUE)
subfix=args[1]

candidates = read.table(sprintf("candidates-cutoff%s-bestcomb%s.txt",cutoff,best),sep="\t",stringsAsFactors=FALSE,quote='"')
c_mols = candidates[,1]

jsd = read.table(sprintf("JSDs-cutoff%s-bestcomb%s.txt",cutoff,best),sep="\t",stringsAsFactors=FALSE,quote='"')
all_mols = rownames(jsd)

set.seed(2019)
res = MC_causalDisturbance(c_mols, all_mols)

res$sign = ifelse(grepl("__inh",res$Name),"inhibition","activation")
res$Pathway = p_names[match(gsub("__inh","",res$Name),p_names$name),"completeName"]
res = res %>% select(Pathway,sign,everything())

write.table(res,sprintf("allSSC_candidates-cutoff%s-bestcomb%s_SignedMetacorePathways.txt",cutoff,best),sep="\t",row.names=FALSE,quote=FALSE)

pred = res %>% filter(P_SSC <= 0.05) %>% arrange(P_SSC)

write.table(pred,sprintf("pathways-cutoff%s-bestcomb%s.txt",cutoff,best),sep="\t",row.names=FALSE,quote=FALSE)